##########################################################################################################################################
###### This file estimates the control variables and allocates everything to dataframes
###############################
# The dataframes saved here are later used in the main analyses
#######################################
# There are other code files that are used for data preparation
# This processed data are saved and loaded in this code


# set to your working directory:
setwd("C:\\Users\\Admin\\OneDrive\\Desktop\\Market Power and Systematic Risk - Data and Code")
getwd()

######################
# This is a dataset from CRSP containing the following elements:
# PERMNO,	date,	SHRCD,	SICCD,	TICKER,	COMNAM,	NAICS,	PERMCO,	CUSIP,	PRC,	VOL,	SHROUT
CRSP = read.table(unz("CRSP_txt.zip","5a072198a36ffd3d.txt"),sep="\t",header=T)
#######################
library(data.table)
setDT(CRSP)
#Sometimes you see negative stock prices in CRSP. This means that there was no closing price available for that period. 
#Instead, the bid/ask average was used. To distinguish the bid/ask averages from actual closing prices, 
#CRSP puts a leading dash in front of the price when the bid/ask average was used. If neither price nor bid/ask average is available, 
#Price or Bid/Ask Average is set to zero.
CRSP$PRC[CRSP$PRC<0 & !is.na(CRSP$PRC)] = CRSP$PRC[CRSP$PRC<0 & !is.na(CRSP$PRC)]*(-1)
#Remove all occurences of zero prices:
CRSP = CRSP[CRSP$PRC != 0,]
#Firm Age
#Is one plus the current year minus the year the record first appears on the CRSP tapes:
#Create a new vector age and save the year of the first record
CRSP$Age = rep(NA,nrow(CRSP))
library(stringi)
CRSP$Age = CRSP[,rep(as.numeric(unlist(lapply(strsplit(as.character(date),split=""),function(x) stri_paste(head(x,n=4),collapse=""))))[1],nrow(.SD)),by="PERMNO"]$V1
CRSP$Age = 1 + as.numeric(unlist(lapply(strsplit(as.character(CRSP$date),split=""),function(x) stri_paste(head(x,n=4),collapse="")))) - CRSP$Age
#Momentum
#cumulative stock return from t-12 to t-1
CRSP$Return = CRSP[,c(NA,diff(log(PRC))),by="PERMNO"]$V1
Momentum_function = function(DT){
  #Remove the return of the last month, by substracting it again!
  Momentum_Return = frollsum(DT$Return,n=13,na.rm=T) - DT$Return
  return(Momentum_Return)
}
CRSP$Momentum = CRSP[,Momentum_function(.SD),by="PERMNO"]$V1
#Illiqudity
#absolute value of the stock's return divided by the dollar volume, averaged over the previous year.
Amihud_Illiq = function(DT){
  Illiquidity = frollmean(abs(DT$Return)/DT$VOL,n=12,na.rm=T)
  return(Illiquidity)
}
CRSP$Illiquidity = CRSP[ ,Amihud_Illiq(.SD),by = "PERMNO"]$V1
#Size
#current market capitalization of a firm. Market capitalization is computed as the product of the stock price and the number of shares outstanding
CRSP$MktCap = CRSP$PRC * CRSP$SHROUT

######################################
# this is a Compustat data file containing the following elements:
# "gvkey"    "cusip"    "tic"      "conm"     "datadate" "fyr"      "fyear"    "sich"     "naicsh"   "gp"       "at"      
# "capx"     "ceq"      "csho"     "dltt"     "ibcom"    "itci"     "lt"       "invt"     "oibdp"    "ppegt"    "pstk"    
# "pstkl"    "pstkrv"   "sale"     "seq"      "txdb"     "txdi"     "txditc"   "exchg"    "cik"      "costat"   "dvpsp_c" 
# "dvpsx_c"  "prcc_c"   "dvpsp_f"  "dvpsx_f"  "mkvalt"   "prcc_f" 
load("Compustat.RData")

Compustat$numgvkey = as.numeric(Compustat$gvkey)
Compustat$sic3 = as.numeric(substr(Compustat$sich,start=1,stop=3))

##################################
# these data are from the Hoberg and Philips data library:
# https://hobergphillips.tuck.dartmouth.edu/
HobergPhillips = read.table(unz("FluidityData.zip","FluidityData.txt"),header=T,sep="")

HobergPhillips_Two = read.table(unz("TNIC3HHIdata.zip","TNIC3HHIdata.txt"),header=T,sep="",fill=T)

HobergPhillips_Three = read.table(unz("HP2010_FittedHHIdata.zip","Fitted_herfindahl_Hoberg_Phillips.txt"),header=T,sep="",fill=T)
###############################

Compustat <- merge(Compustat, HobergPhillips, by.x=c("fyear","numgvkey"), by.y=c("year","gvkey"),all.x=T)

Compustat <- merge(Compustat, HobergPhillips_Two, by.x=c("fyear","numgvkey"), by.y=c("year","gvkey"),all.x=T)

Compustat <- merge(Compustat, HobergPhillips_Three, by.x=c("fyear","sic3"), by.y=c("year","sic3"),all.x=T)


library(data.table)
setDT(Compustat)
#In that order calculate market equity item 199 times item 25, at fiscal year end
Market_Equity = Compustat$prcc_c * Compustat$csho
#Market-to-book Ratio
#stockholders equity plus balance sheet deferred taxes and investment tax credit (item 35) minus the book value of preferred stock
#In that order calculate stockholder's equity: Compustat item 216 if not, then 60+130 if not then 6-181
Stockholder_Equity = Compustat$seq
Stockholder_Equity[is.na(Stockholder_Equity)] = (Compustat$ceq + Compustat$pstk)[is.na(Stockholder_Equity)]
Stockholder_Equity[is.na(Stockholder_Equity)] = (Compustat$at - Compustat$lt)[is.na(Stockholder_Equity)]

#In that order calculate preferred stock: 56 if not then 10 if not then 130
Pref_Stock = Compustat$pstkrv
Pref_Stock[is.na(Pref_Stock)] = Compustat$pstkl[is.na(Pref_Stock)]
Pref_Stock[is.na(Pref_Stock)] = Compustat$pstk[is.na(Pref_Stock)]
Book_Equity = rep(NA,length(Stockholder_Equity))
Book_Equity[!is.na(Stockholder_Equity) & !is.na(Compustat$txditc) & !is.na(Pref_Stock)] = Stockholder_Equity[!is.na(Stockholder_Equity) & !is.na(Compustat$txditc) & !is.na(Pref_Stock)] + Compustat$txditc[!is.na(Stockholder_Equity) & !is.na(Compustat$txditc) & !is.na(Pref_Stock)] - Pref_Stock[!is.na(Stockholder_Equity) & !is.na(Compustat$txditc) & !is.na(Pref_Stock)]
Book_Equity[!is.na(Stockholder_Equity) & !is.na(Compustat$txditc) & is.na(Pref_Stock)] = Stockholder_Equity[!is.na(Stockholder_Equity) & !is.na(Compustat$txditc) & is.na(Pref_Stock)] + Compustat$txditc[!is.na(Stockholder_Equity) & !is.na(Compustat$txditc) & is.na(Pref_Stock)]
Book_Equity[!is.na(Stockholder_Equity) & is.na(Compustat$txditc) & is.na(Pref_Stock)] = Stockholder_Equity[!is.na(Stockholder_Equity) & is.na(Compustat$txditc) & is.na(Pref_Stock)]
Book_Equity[!is.na(Stockholder_Equity) & is.na(Compustat$txditc) & !is.na(Pref_Stock)] = Stockholder_Equity[!is.na(Stockholder_Equity) & is.na(Compustat$txditc) & !is.na(Pref_Stock)] - Pref_Stock[!is.na(Stockholder_Equity) & is.na(Compustat$txditc) & !is.na(Pref_Stock)]
#Omit all variables, with Book Value lower than zero:
Book_Equity[Book_Equity<0] = NA

Compustat$Mkt_Book = Market_Equity/Book_Equity
#Dividend Dummy
Compustat$Dividend_Dummy = ifelse(Compustat$dvpsx_c>0,1,0)
Compustat$Dividend_Dummy[is.na(Compustat$Dividend_Dummy)] = ifelse(Compustat$dvpsp_c[is.na(Compustat$Dividend_Dummy)]>0,1,0)
#Log of total Assets
Compustat$Logat = log(Compustat$at)
#Current firm return on equity
ROE = rep(NA,length(Stockholder_Equity))
ROE[!is.na(Compustat$ibcom) & !is.na(Compustat$txdi) & !is.na(Compustat$itci)] = Compustat$ibcom[!is.na(Compustat$ibcom) & !is.na(Compustat$txdi) & !is.na(Compustat$itci)] + Compustat$txdi[!is.na(Compustat$ibcom) & !is.na(Compustat$txdi) & !is.na(Compustat$itci)] + Compustat$itci[!is.na(Compustat$ibcom) & !is.na(Compustat$txdi) & !is.na(Compustat$itci)]
ROE[!is.na(Compustat$ibcom) & !is.na(Compustat$txdi) & is.na(Compustat$itci)] = Compustat$ibcom[!is.na(Compustat$ibcom) & !is.na(Compustat$txdi) & is.na(Compustat$itci)] + Compustat$txdi[!is.na(Compustat$ibcom) & !is.na(Compustat$txdi) & is.na(Compustat$itci)]
ROE[!is.na(Compustat$ibcom) & is.na(Compustat$txdi) & is.na(Compustat$itci)] = Compustat$ibcom[!is.na(Compustat$ibcom) & is.na(Compustat$txdi) & is.na(Compustat$itci)]
Compustat$ROE = ROE/Book_Equity
#Investment rates
Compustat$InvestmentRates = Compustat[,capx/shift(ppegt,n=1),by="cusip"]$V1
#Tobins q
CRSP_Temp = CRSP[as.numeric(unlist(lapply(strsplit(as.character(CRSP$date),split=""),function(x) stri_paste(tail(head(x,n=6),n=2),collapse=""))))==12,]

#Firm size
Compustat$Firm_size = Compustat$prcc_c * Compustat$csho
#Operating Leverage
#3-year moving average of the percentage change in operating income:
Compustat$Operating_Leverage = Compustat[,frollmean((oibdp-shift(oibdp,n=1))/oibdp * 100,n=3,na.rm=T)/((sale-shift(sale,n=1))/sale * 100),by="cusip"]$V1
#Financial Leverage:
Compustat$Financial_Leverage = Compustat$at/Compustat$Firm_size
#Default spread
###################
# This data is from Amit Goyal's webiste (https://sites.google.com/view/agoyal145)
GoyalWelch = read.table("PredictorData2019.csv",header=T,dec=",",sep=";")
#####################
DefaultSpread = data.frame(Date=GoyalWelch$yyyymm,Default_Spread=(GoyalWelch$BAA - GoyalWelch$AAA))
#We can use this and merge it by the date:
library(stringi)
library(lubridate)
Compustat$yearMonth = as.numeric(unlist(lapply(strsplit(as.character(Compustat$datadate %m+% months(4)),split="-"),function(x) stri_paste(head(x,n=2),collapse=""))))
Compustat = merge(Compustat,DefaultSpread,by.x="yearMonth",by.y="Date")


load("PANEL_NAICS.RData")

PANEL_New = merge(PANEL_NAICS,CRSP[,c("PERMNO","date","SHRCD","SICCD","TICKER","COMNAM","PERMCO",
                                      "CUSIP","Age","Momentum","Illiquidity","MktCap")],by.x=c("PERMNO","Time"),by.y=c("PERMNO","date"),all.x=T)


######
# this is the Link Table from the CRSP-Compustat Merged database
# it contains the elements: gvkey,LPERMNO,LINKDT,LINKENDDT
Link_Table = read.table("LinkTable.txt",header=T,sep=",")
#############
library(stringi)
Compustat$gvkey = as.numeric(Compustat$gvkey)
#First we can remove all companies without any sales data:
Compustat$datadate = as.Date(as.character(Compustat$datadate),format="%Y-%m-%d")
#Just match the time of Grullon et al. (2019)
Compustat = Compustat[Compustat$datadate>"1972-01-01",]
#The Link Table has multiple LPERMNOs with one gvkey:
#So also match them within the time-frame defined by LINKDT and LINKENDDT
Compustat$PERMNO = rep(NA,nrow(Compustat))
Compustat$PERMNO = as.numeric(Compustat$PERMNO)
for(k in which(!duplicated(Compustat$gvkey))){
  if(length(Link_Table$LINKENDDT[Link_Table$gvkey %in% Compustat$gvkey[k]])!=0){
    
    for(j in 1:length(Link_Table$LINKENDDT[Link_Table$gvkey %in% Compustat$gvkey[k]])){
      Temp = Compustat[Compustat$gvkey == Compustat$gvkey[k],]
      Temp_Seq = c()
      if(is.na(as.numeric(format(as.Date(as.character(Link_Table$LINKENDDT[Link_Table$gvkey %in% Compustat$gvkey[k]][j]),format="%Y%m%d"),"%Y")))){
        Temp_Seq = seq(from = as.numeric(format(as.Date(as.character(Link_Table$LINKDT[Link_Table$gvkey %in% Compustat$gvkey[k]][j]),format="%Y%m%d"),"%Y")),
                       to = 2020)
        Temp_Seq = unique(Temp_Seq)
        Temp_Indik = Temp$fyear %in% Temp_Seq
        Temp = Temp[Temp$fyear %in% Temp_Seq,] 
        #
        
        Compustat[Compustat$gvkey == Compustat$gvkey[k],]$PERMNO[Temp_Indik] = rep(Link_Table$LPERMNO[Link_Table$gvkey %in% Compustat$gvkey[k]][j],nrow(Temp))
        
      }else{
        Temp_Seq = seq(from = as.numeric(format(as.Date(as.character(Link_Table$LINKDT[Link_Table$gvkey %in% Compustat$gvkey[k]][j]),format="%Y%m%d"),"%Y")),
                       to = as.numeric(format(as.Date(as.character(Link_Table$LINKENDDT[Link_Table$gvkey %in% Compustat$gvkey[k]][j]),format="%Y%m%d"),"%Y")))
        
        Temp_Seq = unique(Temp_Seq)
        Temp_Indik = Temp$fyear %in% Temp_Seq
        Temp = Temp[Temp$fyear %in% Temp_Seq,] 
        #
        
        Compustat[Compustat$gvkey == Compustat$gvkey[k],]$PERMNO[Temp_Indik] = rep(Link_Table$LPERMNO[Link_Table$gvkey %in% Compustat$gvkey[k]][j],nrow(Temp))
      }
    }
  }
}
#Eliminate all Compustat data with no PERMNOs:
Compustat = Compustat[!is.na(Compustat$PERMNO),]
#create a year indicator
PANEL_New$year = as.numeric(unlist(lapply(strsplit(as.character(PANEL_New$Time),split=""),function(x) stri_paste(head(x,n=4),collapse=""))))

#Use the following columns:
Compustat = Compustat[!duplicated(paste0(Compustat$datadate,Compustat$PERMNO)),]
PANEL_New$Time = as.Date(as.character(PANEL_New$Time),format="%Y%m%d")
#Merge them by the date from the Compustat data (Year Month)
PANEL_New$YearMonth = as.numeric(format(PANEL_New$Time,"%Y%m"))
#Fama and French stuff
library(lubridate)
#Move it forward
Compustat$YearMonth = as.numeric(format(Compustat$datadate %m+% months(4),"%Y%m"))

Panel_New = merge(PANEL_New, Compustat, by = c("PERMNO","YearMonth"), all.x=T)
options(warn=2)
for(i in which(!duplicated(Panel_New$PERMNO))){
  Temp = Panel_New[Panel_New$PERMNO == Panel_New$PERMNO[i],]
  #Choose the subset that is actually from Compustat (so only available annually)
  Temp_Compustat = Temp[,  c("gvkey","cusip","tic","conm","datadate","fyr","fyear","sich",
                             "naicsh","gp","at","capx","ceq","csho","dltt","ibcom","itci",
                             "lt","invt","oibdp","ppegt","pstk","pstkl","pstkrv","sale","seq",
                             "txdb","txdi","txditc","exchg","cik","costat","dvpsp_c","dvpsx_c",
                             "prcc_c","dvpsp_f","dvpsx_f","mkvalt","prcc_f","Mkt_Book","Dividend_Dummy",
                             "Logat","ROE","InvestmentRates","Firm_size","Operating_Leverage","Financial_Leverage",
                             "Default_Spread","prodmktfluid","tnic3tsimm","tnic3hhi","fithhi")]
  if(all(is.na(Temp_Compustat))){
    #If we have no Compustat data
    #In that case, we do nothing!
    
  }else if(sum(apply(Temp_Compustat,1,function(x)any(!is.na(x)))) == 1){
    #If we only have one row that is not NA
    #Then we only constantly extrapolate for the same year!
    library(zoo)
    for(j in 1:dim(Temp_Compustat)[2]){
      Temp_Compustat[,j] = na.locf(Temp_Compustat[,j], rule = "constant",na.rm=F)
    }
    
  }else{
    #If we have more than one row of Compustat observations
    #First use constant Interpolation everywhere
    #Only apply this to rows that do not only contain NAs
    library(zoo)
    for(j in 1:dim(Temp_Compustat)[2]){
    Temp_Compustat[,j] = na.locf(Temp_Compustat[,j], rule = "constant",na.rm=F)
    }
  }
  Temp[,  c("gvkey","cusip","tic","conm","datadate","fyr","fyear","sich",
            "naicsh","gp","at","capx","ceq","csho","dltt","ibcom","itci",
            "lt","invt","oibdp","ppegt","pstk","pstkl","pstkrv","sale","seq",
            "txdb","txdi","txditc","exchg","cik","costat","dvpsp_c","dvpsx_c",
            "prcc_c","dvpsp_f","dvpsx_f","mkvalt","prcc_f","Mkt_Book","Dividend_Dummy",
            "Logat","ROE","InvestmentRates","Firm_size","Operating_Leverage","Financial_Leverage",
            "Default_Spread","prodmktfluid","tnic3tsimm","tnic3hhi","fithhi")] = Temp_Compustat
  
  Panel_New[Panel_New$PERMNO == Panel_New$PERMNO[i],] = Temp
  if(i%%1000 == 0){
    print(paste0(i/nrow(Panel_New) * 100," Percent"))
  }
}
options(warn=1)

save(Panel_New,file = "Control_Panel.RData")
#For Daily Beta Estimation:
#save(Panel_New,file = "Control_Panel_Daily.RData")

##### Create Tobin's q and indiosyncratic volatility:
#Tobin's q:
Panel_New$q = rep(NA, nrow(Panel_New))
#One is na:
Indik = is.na(Panel_New$dltt)
Panel_New$q[Indik] = (Panel_New$MktCap[Indik] + Panel_New$pstkrv[Indik] - Panel_New$invt[Indik] - Panel_New$txdb[Indik])/Panel_New$ppegt[Indik]
Indik = is.na(Panel_New$pstkrv)
Panel_New$q[Indik] = (Panel_New$MktCap[Indik] + Panel_New$dltt[Indik] - Panel_New$invt[Indik] - Panel_New$txdb[Indik])/Panel_New$ppegt[Indik]
Indik = is.na(Panel_New$invt)
Panel_New$q[Indik] = (Panel_New$MktCap[Indik] + Panel_New$dltt[Indik] + Panel_New$pstkrv[Indik] - Panel_New$txdb[Indik])/Panel_New$ppegt[Indik]
Indik = is.na(Panel_New$txdb)
Panel_New$q[Indik] = (Panel_New$MktCap[Indik] + Panel_New$dltt[Indik] + Panel_New$pstkrv[Indik] - Panel_New$invt[Indik])/Panel_New$ppegt[Indik]
#Two are na:
Indik = is.na(Panel_New$dltt) & is.na(Panel_New$invt)
Panel_New$q[Indik] = (Panel_New$MktCap[Indik] + Panel_New$pstkrv[Indik] - Panel_New$txdb[Indik])/Panel_New$ppegt[Indik]
Indik = is.na(Panel_New$pstkrv) & is.na(Panel_New$invt)
Panel_New$q[Indik] = (Panel_New$MktCap[Indik] + Panel_New$dltt[Indik] - Panel_New$txdb[Indik])/Panel_New$ppegt[Indik]
Indik = is.na(Panel_New$txdb) & is.na(Panel_New$invt)
Panel_New$q[Indik] = (Panel_New$MktCap[Indik] + Panel_New$dltt[Indik] + Panel_New$pstkrv[Indik])/Panel_New$ppegt[Indik]
#Two are na:
Indik = is.na(Panel_New$dltt) & is.na(Panel_New$txdb)
Panel_New$q[Indik] = (Panel_New$MktCap[Indik] + Panel_New$pstkrv[Indik] - Panel_New$invt[Indik])/Panel_New$ppegt[Indik]
Indik = is.na(Panel_New$pstkrv) & is.na(Panel_New$txdb)
Panel_New$q[Indik] = (Panel_New$MktCap[Indik] + Panel_New$dltt[Indik] - Panel_New$invt[Indik])/Panel_New$ppegt[Indik]

Indik = (!is.na(Panel_New$dltt) | !is.na(Panel_New$pstkrv) | !is.na(Panel_New$invt) | !is.na(Panel_New$txdb) |
           !(is.na(Panel_New$dltt) & is.na(Panel_New$invt)) | !(is.na(Panel_New$pstkrv) & is.na(Panel_New$invt)) |
           !(is.na(Panel_New$txdb) & is.na(Panel_New$invt)) | !(is.na(Panel_New$dltt) & is.na(Panel_New$txdb)) |
           !(is.na(Panel_New$pstkrv) & is.na(Panel_New$txdb)))
Panel_New$q[Indik] = (Panel_New$MktCap[Indik] + Panel_New$dltt[Indik] + Panel_New$pstkrv[Indik] - Panel_New$invt[Indik] - Panel_New$txdb[Indik])/Panel_New$ppegt[Indik]

save(Panel_New,file = "Control_Panel.RData")
#Daily
#save(Panel_New,file = "Control_Panel_Daily.RData")












